reqpkg <- c("ggplot2","plotly","DESeq2","magrittr","dplyr","genefilter","AnnotationHub","org.Mm.eg.db","GO.db","vsn","pheatmap","biomaRt","curl","RColorBrewer","viridis","fgsea","tidyverse","DT","ggpubr")
for (i in reqpkg) {
  suppressWarnings(suppressMessages(library(i, quietly=T, verbose=T, warn.conflicts=F,character.only=T)))
  print(i)
}
## [1] "ggplot2"
## [1] "plotly"
## [1] "DESeq2"
## [1] "magrittr"
## [1] "dplyr"
## [1] "genefilter"
## [1] "AnnotationHub"
## [1] "org.Mm.eg.db"
## [1] "GO.db"
## [1] "vsn"
## [1] "pheatmap"
## [1] "biomaRt"
## [1] "curl"
## [1] "RColorBrewer"
## [1] "viridis"
## [1] "fgsea"
## [1] "tidyverse"
## [1] "DT"
## [1] "ggpubr"
theme_set(theme_pubr())
select <- dplyr::select
ensembl <- useMart("ensembl")
ensemblMouse <- useDataset("mmusculus_gene_ensembl", mart=ensembl)
mouseProteinCodingGenes <- getBM(
  attributes=c("ensembl_gene_id","external_gene_name","description"), 
  filters='biotype', 
  values=c('protein_coding'), 
  mart=ensemblMouse)

DESeq

df <- read.csv("combatSeq_corrected_data.csv") %>%
  .[rowSums(sapply(., '%in%', mouseProteinCodingGenes$external_gene_name)) > 0,] %>% 
  set_rownames(.$X) %>% 
  select(-X) %>% 
  select(c(-MF.13,-MF.17,-MF.23))

sex_list <- c(rep("female", 15), rep("male", 14))
cond_list <- c(rep("12wConv", 4), rep("4wConv", 4), rep("GF", 3), rep("SPF", 4), rep("12wConv", 3), rep("4wConv", 3), rep("GF", 4), rep("SPF", 4))
condition_list <- data.frame(row.names=colnames(df), Sex=sex_list, Condition=cond_list)

condition_list$Sex <- relevel(condition_list$Sex, "female")
condition_list$Condition <- relevel(condition_list$Condition, "SPF")

dds <- DESeq2::DESeqDataSetFromMatrix(countData=df,
                                      colData = condition_list,
                                      design = ~ Sex + Condition + Sex:Condition)
head(dds)
## class: DESeqDataSet 
## dim: 6 29 
## metadata(1): version
## assays(1): counts
## rownames(6): Rp1 Sox17 ... Gm37988 Tcea1
## rowData names(0):
## colnames(29): MF.20 MF.24 ... MF.11 MF.12
## colData names(2): Sex Condition
dds <- estimateSizeFactors(dds)
colData(dds)
## DataFrame with 29 rows and 3 columns
##            Sex Condition        sizeFactor
##       <factor>  <factor>         <numeric>
## MF.20   female   12wConv  1.08104749069605
## MF.24   female   12wConv   1.1524481517802
## MF.1    female   12wConv 0.948474051294536
## MF.3    female   12wConv  0.93478546312708
## MF.19   female    4wConv  1.10897808920126
## ...        ...       ...               ...
## MF.9      male        GF 0.926910147270914
## MF.27     male       SPF  1.10699861761705
## MF.28     male       SPF  1.08840541444753
## MF.11     male       SPF 0.971416269261023
## MF.12     male       SPF 0.922861141150383
dds$Group <- factor(paste0(dds$Sex, dds$Condition), levels = c("femaleSPF", "female12wConv", "female4wConv", "femaleGF", "maleSPF", "male12wConv", "male4wConv", "maleGF"))
design(dds) <- ~  Group
vsd <- vst(dds, blind = TRUE)
head(assay(vsd), 3)
##           MF.20     MF.24     MF.1     MF.3    MF.19    MF.21     MF.2     MF.4
## Rp1    6.483520  6.845312 6.658565 6.660960 6.833048 6.839088 6.963209 6.948694
## Sox17  8.684651  8.327021 8.215158 8.540489 8.040992 8.179754 8.384219 8.055689
## Mrpl15 9.766815 10.097341 9.929943 9.899744 9.852936 9.983695 9.854249 9.930592
##            MF.29     MF.30    MF.14    MF.31     MF.32    MF.15    MF.16
## Rp1     6.328831  6.857582 6.704921 6.683618  6.601405 6.567786 6.494626
## Sox17   8.674499  8.485366 8.377268 8.052071  8.026195 8.124486 8.147617
## Mrpl15 10.071977 10.247321 9.989980 9.889346 10.043617 9.877673 9.893376
##           MF.22     MF.6     MF.8    MF.18     MF.5     MF.7    MF.25    MF.26
## Rp1    6.760564 7.005746 6.697136 6.854662 6.975232 6.730715 6.671619 6.900810
## Sox17  8.114032 8.370795 8.289486 8.444027 7.990524 8.364131 8.331712 8.693452
## Mrpl15 9.725629 9.845922 9.816283 9.896309 9.827274 9.877667 9.860833 9.907690
##           MF.10     MF.9    MF.27     MF.28    MF.11    MF.12
## Rp1    6.667650 6.799475 6.810229  6.959737 6.842398 6.702335
## Sox17  8.347725 8.371263 8.205323  8.027686 8.483256 8.521549
## Mrpl15 9.853993 9.911690 9.685097 10.097632 9.978403 9.933225
colData(vsd)
## DataFrame with 29 rows and 4 columns
##            Sex Condition        sizeFactor         Group
##       <factor>  <factor>         <numeric>      <factor>
## MF.20   female   12wConv  1.08104749069605 female12wConv
## MF.24   female   12wConv   1.1524481517802 female12wConv
## MF.1    female   12wConv 0.948474051294536 female12wConv
## MF.3    female   12wConv  0.93478546312708 female12wConv
## MF.19   female    4wConv  1.10897808920126  female4wConv
## ...        ...       ...               ...           ...
## MF.9      male        GF 0.926910147270914        maleGF
## MF.27     male       SPF  1.10699861761705       maleSPF
## MF.28     male       SPF  1.08840541444753       maleSPF
## MF.11     male       SPF 0.971416269261023       maleSPF
## MF.12     male       SPF 0.922861141150383       maleSPF
rld <- rlog(dds, blind=FALSE)
ntd <- normTransform(dds)
meanSdPlot(assay(ntd))

meanSdPlot(assay(vsd))

meanSdPlot(assay(rld))

vst(dds[,1:15]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, female only")

vst(dds[,16:29]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, male only")

plotPCA(vsd, intgroup=c("Sex","Condition")) + ggtitle("vst")

plotPCA(vsd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1)

plotPCA(ntd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("nld")

plotPCA(rld, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("rld")

dds <- DESeq(dds)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#Compare SPF v GF in F
res_1 <- results(dds,contrast=c("Group", "femaleSPF", "femaleGF"), tidy = TRUE)
#Compare 4w vs 12w in F
res_2 <- results(dds, contrast=c("Group", "female4wConv", "female12wConv"), tidy = TRUE)
#Compare SPF vs 4w in F
res_3 <- results(dds, contrast=c("Group", "femaleSPF", "female4wConv"), tidy = TRUE)
#Compare SPF vs 12w in F
res_4 <- results(dds, contrast=c("Group", "femaleSPF", "female12wConv"), tidy = TRUE)
#Compare SPF v GF in M
res_5 <- results(dds,contrast=c("Group", "maleSPF", "maleGF"), tidy = TRUE)
#Compare 4w vs 12w in M
res_6 <- results(dds, contrast=c("Group", "male4wConv", "male12wConv"), tidy = TRUE)
#Compare SPF vs 4w in M
res_7 <- results(dds, contrast=c("Group", "maleSPF", "male4wConv"), tidy = TRUE)
#Compare SPF vs 12w in M
res_8 <- results(dds, contrast=c("Group", "maleSPF", "male12wConv"), tidy = TRUE)
sum(res_1$pvalue < 0.05, na.rm=TRUE)
## [1] 3738
sum(!is.na(res_1$pvalue))
## [1] 18757
sum(res_1$padj < 0.1, na.rm=TRUE)
## [1] 2321
sum(res_2$pvalue < 0.05, na.rm=TRUE)
## [1] 575
sum(!is.na(res_2$pvalue))
## [1] 18757
sum(res_2$padj < 0.1, na.rm=TRUE)
## [1] 4
sum(res_3$pvalue < 0.05, na.rm=TRUE)
## [1] 3555
sum(!is.na(res_3$pvalue))
## [1] 18757
sum(res_3$padj < 0.1, na.rm=TRUE)
## [1] 1964
sum(res_4$pvalue < 0.05, na.rm=TRUE)
## [1] 3261
sum(!is.na(res_4$pvalue))
## [1] 18757
sum(res_4$padj < 0.1, na.rm=TRUE)
## [1] 1739
sum(res_5$pvalue < 0.05, na.rm=TRUE)
## [1] 3012
sum(!is.na(res_5$pvalue))
## [1] 18757
sum(res_5$padj < 0.1, na.rm=TRUE)
## [1] 1280
sum(res_6$pvalue < 0.05, na.rm=TRUE)
## [1] 642
sum(!is.na(res_6$pvalue))
## [1] 18757
sum(res_6$padj < 0.1, na.rm=TRUE)
## [1] 0
sum(res_7$pvalue < 0.05, na.rm=TRUE)
## [1] 1833
sum(!is.na(res_7$pvalue))
## [1] 18757
sum(res_7$padj < 0.1, na.rm=TRUE)
## [1] 358
sum(res_8$pvalue < 0.05, na.rm=TRUE)
## [1] 1944
sum(!is.na(res_8$pvalue))
## [1] 18757
sum(res_8$padj < 0.1, na.rm=TRUE)
## [1] 357
resultsNames(dds)
## [1] "Intercept"                        "Group_female12wConv_vs_femaleSPF"
## [3] "Group_female4wConv_vs_femaleSPF"  "Group_femaleGF_vs_femaleSPF"     
## [5] "Group_maleSPF_vs_femaleSPF"       "Group_male12wConv_vs_femaleSPF"  
## [7] "Group_male4wConv_vs_femaleSPF"    "Group_maleGF_vs_femaleSPF"
condition_LFC <- lfcShrink(dds, coef = "Group_female12wConv_vs_femaleSPF", type = 'apeglm')
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(condition_LFC, ylim=c(-2,2))

topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 50)
mat  <- assay(vsd)[ topVarGenes, ]
mat  <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c("Sex","Condition")])
pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno, cluster_cols = F)

pheatmap(mat, annotation_col = anno)

pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno)

fGSEA

pathway_folder <- 'pathways/'

fgsea_output <- list()
ranks <- list()

mart <- useDataset("mmusculus_gene_ensembl", mart=useMart("ensembl"))
bm <- getBM(attributes=c("external_gene_name", "hsapiens_homolog_associated_gene_name"), mart=mart) %>%
  distinct() %>%
  as_tibble() %>%
  na_if("") %>%
  na.omit()

fGSEA_pipe <- function(res, path, title, pres=FALSE) {
  res2 <- inner_join(res_1, bm, by=c("row"="external_gene_name")) %>%
    select(hsapiens_homolog_associated_gene_name, stat) %>%
    na.omit() %>%
    distinct() %>%
    group_by(hsapiens_homolog_associated_gene_name) %>%
    summarize(stat=mean(stat))
  
  ranks <- deframe(res2)
  if(pres) head(ranks, 20)
  
  if(path == "hallmark") p <- "h.all.v7.0.symbols.gmt"
  else if(path == "KEGG") p <- "c2.cp.kegg.v7.0.symbols.gmt"
  else if(path == "mir") p <- "c3.mir.v7.0.symbols.gmt"
  else if(path == "GO_all") p <- "c5.all.v7.0.symbols.gmt"
  else if(path == "GO_BP") p <- "c5.bp.v7.0.symbols.gmt"
  
  fgseaResTidy <- fgsea(pathways=gmtPathways(paste0(pathway_folder, p)), stats=ranks, nperm=100000) %>%
    as_tibble() %>%
    filter(padj < 0.05) %>%
    arrange(desc(NES))
  
  p <- ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
    geom_col() +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score",
         title=paste0(title, ": ", path, " pathways NES from GSEA")) +
    theme_minimal()
  return(p)
}
fGSEA_pipe(res_1, "hallmark", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_1, "KEGG", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_1, "GO_BP", "Female SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_2, "hallmark", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_2, "KEGG", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_2, "GO_BP", "Female 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_3, "hallmark", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_3, "KEGG", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_3, "GO_BP", "Female SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_4, "hallmark", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_4, "KEGG", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_4, "GO_BP", "Female SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_5, "hallmark", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_5, "KEGG", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_5, "GO_BP", "Male SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_6, "hallmark", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_6, "KEGG", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_6, "GO_BP", "Male 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_7, "hallmark", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_7, "KEGG", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_7, "GO_BP", "Male SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_8, "hallmark", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

fGSEA_pipe(res_8, "KEGG", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

p <- fGSEA_pipe(res_8, "GO_BP", "Male SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
 

By Sumeed Yoyo Manzoor

smanzoor@uchicago.edu